library(stm)
library(openxlsx)
library(ggplot2)
library(gridExtra)
library(grid)
library(dplyr)
library(data.table)
library(lubridate)
library(zoo)
library(lmtest)
library(mgcv)
library(rdrobust)
library(arm)
library(stargazer)


# Select optimal number of knots

selectK <- function(fit){
  f <- function(k){
    K.star <<- k
    update(fit)
  }
  out <- lapply(1:10, f)     
  out[[which.min(unlist(lapply(out, BIC)))]]
}


#### rescale to 0, 1
range01 <- function(x){
  if(length(x)==1) return(x)
  (x-min(x, na.rm = TRUE))/(max(x, na.rm = TRUE)-min(x, na.rm = TRUE))
}

#### To update Felm Functions
G <- function(x) { x[[3]] <- x[[3]][[2]]; x}


#### MAKE LAGS BASED ON FACTORS

lag <- function(x, s, factor=NULL){
  lagtemp <- function(x,s) {
    if(length(x) > s)  M = c(rep(NA, length=s), rev(rev(x)[-(1:s)]))
    if(length(x) <= s) M = rep(NA, length=length(x))
    M
  }
  lagtemp2 <- function(x, s, factor){
    if (is.null(factor)) return(lagtemp(x, s))
    if (!is.null(factor)) return(unlist(tapply(x, factor(factor), lagtemp, s=s)))
  }
  sapply(1:s, lagtemp2, x=x, factor=factor)
}


## Compute bootstrapped clusterres SE's
boot.cluster <- function(fit, var, M = 1000, wild = TRUE){
  
  if(wild == TRUE){
    
    K.star <<- sum(grepl("K.star", names(coef(fit))))
    ofit <- update(fit, na.action = na.omit)
    if(class(fit)[1]%in%c("gam")) d <<- ofit$data
    if(class(fit)[1]%in%c("lm"))  d <<- eval(ofit$call$data)
    d <- d[rownames(model.frame(ofit)), ]
    cluster <- d[, var]
    
    f <- function(){
      e <- sample(c(-1, 1), length(unique(cluster)), replace = TRUE)[factor(cluster)]
      d$y <- predict(ofit) + e*residuals(ofit)
      out <- update(ofit, y ~ ., data = d)
      b  <- coef(out)
      se <- sqrt(diag(vcov(out)))
      cbind(b = b, se = se)
    }
    
    out <- replicate(M, f())
    se <- apply(out, 1:2, sd)[, 1]
    #B  <- matrix(coef(fit), nrow = length(coef(fit)), ncol = M, byrow = FALSE)
    # Bootsrapped Wald statistic
    #w  <- ( out[, 1, ] - B)/ out[, 2, ] 
    # Observed Wald statistic
    #W  <- matrix(coef(fit)/sqrt(diag(vcov(fit))), nrow = length(coef(fit)), ncol = M, byrow = FALSE)
    # P-value (symmetric)
    #p <- apply(abs(w) > abs(W), 1, mean)
    t <- coef(fit)/se
    p <- 2*pt(abs(t), df = length(unique(d[, var])) - 1, lower.tail=FALSE)
    
    out <- cbind(b = coef(fit), se = se, t = t, p = p)
    
  }
  
  if (wild==FALSE){  
    
    var <- model.frame(fit)[, var]
    B <- R <- coeftest(fit)
    boot <- function(){
      cl <- sample(unique(var), length(unique(var)), replace = TRUE)
      w <- unlist(sapply(cl, function(w) which(var%in%w)))
      X <- model.matrix(fit)[w, ]
      y <- model.frame(fit)[w, 1]
      refit <- lm(y ~ -1 + X)
      refit <- coeftest(refit, vcov = vcov(refit, type = "HC0"))
      rownames(refit) <- substr(rownames(refit), 2, nchar(rownames(refit)))
      d <- rownames(B)%in%rownames(refit)
      R[d, ] <- refit
      R[!d, ] <- NA
      R
    }
    R  <- replicate(boots, boot())
    se <- apply(R[,1,], 1, sd, na.rm = TRUE)
    p  <- 2*pt(abs(B[,1]/se), df = length(unique(var)), lower.tail = FALSE)
    out <- list(se = se, p = p)
  }
  
  out
  
}

##### Combine results from regressions

outtex <- function(models, below = TRUE, ci = FALSE, ci.approximate = FALSE, omit = NULL, only = NULL, single.var = FALSE, caption = NULL, label = NULL, var.labels = NULL, model.names = NULL, col.names = "", table.placement = 't!', space = "50pt", robust = FALSE, size = "normalsize", file = "", cluster = NULL, boots = NULL, output = NULL, summary = FALSE){
  
  require(multiwayvcov, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  require(xtable, quietly = TRUE)
  
  outr <- function(model, below, ci, omit, only, single.var){
    
    if(!summary) output <- data.frame(coefficients(summary(model)))
    if(summary)  output <- model
    
    if(class(model)[1]%in%c("rlm", "gam", "bayesglm")) output <- coeftest(model)
    
    if (!is.null(omit)) vars <- which(!grepl(paste(omit, collapse="|"), rownames(output)))
    
    if (!is.null(only)) vars <- grep(paste(only, collapse="|"), rownames(output))
    
    if(robust) output <- coeftest(model,  vcov=function(x) vcovHC(x, type="HC1"))
    
    if(!is.null(cluster)) {
      if(is.null(boots))   output <- coeftest(model, vcov. =  cluster.vcov(model, cluster = as.numeric(cluster)))
      if(!is.null(boots)){
        output <- boot.cluster(model, cluster, M = boots)[rownames(output), ]
      }
    }
    
    varnames <- rownames(output)[vars]
    out  <- matrix(output[vars, ], nrow = length(vars), ncol = 4)
    if("felm"%in%class(model)) out  <- as.matrix(output[vars, ], nrow = length(vars), ncol = 4)
    rownames(out) <- varnames 
    
    if(class(model)[1]%in%c("lmerMod", "glmerMod")) out$p <- 2*pnorm(abs(out[, 3]), lower.tail = FALSE)
    
    r <- out[, 1:2]
    p <- out[, 4]
    p <- ifelse(p < 0.001, "^{***}", ifelse(p < 0.01, "^{**}", ifelse(p < 0.05, "^{*}", "")))
    se <- paste("(", roundr(out[, 2], 2), ")", sep="")
    
    if (ci == TRUE) {
      if(ci.approximate) ci <- roundr(cbind(out[,1]-1.96*output[,2], out[,1]+1.96*out[,2]))
      
      if(!ci.approximate) ci <- roundr(confint(model, par = varnames))
      se <- paste("(", ci[,1], ", ", ci[,2], ")" , sep = "")
      p <- ""
    }
    
    m <- data.frame(names = varnames, b = paste(roundr(out[,1]), p, sep=""), se = se, stringsAsFactors = FALSE)
    
    if(single.var==TRUE) return(m)
    
    if (class(model)[1]%in%c("lm", "lmrob", "felm")) {
      r2 <- roundr(summary(model)$adj.r.squared, 2)
      names(r2) <- "Adj. $R^2$"
    }
    
    if (class(model)[1]%in%c("felm")) {
      r2 <- roundr(summary(model)$adj.r.squared, 2)
      names(r2) <- "Adj. $R^2$"
    }
    
    if (class(model)[1]%in%c("ivreg")) {
      r2 <- ""
      names(r2) <- "First-stage F-statistic"
    }
    
    if (class(model)[1]%in%c("plm")) {
      r2 <- roundr(summary(model)$r.squared[2])
      names(r2) <- "Adj. $R^2$"
    }
    
    if(class(model)[1]%in%c("lmerMod")) {
      r2 <- roundr(cor(predict(model), model.frame(model)[,1])^2, 2)
      names(r2) <- "Adj. $R^2$"
    }
    
    if (class(model)[1]%in%c("glm", "glmerMod")){
      r2 <- prettyNum(roundr(logLik(model), 0), big.mark=",", preserve.width="none")
      names(r2) <- "Log-likelihood"
    }
    
    if (class(model)[1]%in%c("gam", "rlm")) {
      r2 <- roundr(cor(model$fitted.values, model.frame(model)[,1])^2,2)
      names(r2) <- "Adj. $R^2$"
    }
    
    if (class(model)[1] == "felm") n <- paste("$", prettyNum(nrow(model$X), big.mark=",", preserve.width="none"), "$", sep = "")
    if (class(model)[1] != "felm") n <- paste("$", prettyNum(nrow(model.frame(model)), big.mark=",", preserve.width="none"), "$", sep = "")      
    
    if(below==FALSE) { 
      if(single.var == TRUE)  m <- data.frame(m, r2 = r2, n = n)
      if(single.var == FALSE) {
        fit <- c(names(r2), paste("\\multicolumn{1}{r}{", r2, "}", sep = ""), "")
        n <- c("Observations", paste("\\multicolumn{1}{r}{", n, "}", sep = ""), "")
        m <- rbind(m, fit, n)
        m <- data.frame(m)
      }}
    
    if(below==TRUE)  {
      
      r2 <- paste("\\multicolumn{1}{d}{", r2, "}", sep = "")
      n <- paste("\\multicolumn{1}{c}{", n, "}", sep = "")  
      est <- c(t(m[,-1]))
      est <- as.matrix(c(est, r2, n))
      names <- c(t(cbind(m$names, paste(m$names, "se", sep = "-"))))
      names <- c(names, "R2", "N")
      names[1] <- "Intercept"
      if (class(model)[1]=="glm") names[which(names=="R2")] <- "Log-Likelihood"
      
      m <- data.frame(names = names, est = est, stringsAsFactors = FALSE)
    }
    
    list(m = m, varnames = varnames)
  }
  
  rr <- lapply(models, outr, ci = ci, below = below, omit = omit, only = only, single.var = single.var)
  if(single.var) return(rr)
  varnames <- unique(unlist(lapply(rr, function(x) x$varnames)))
  rr <- lapply(rr, function(x) x$m)
  m <- suppressWarnings(Reduce(function(...) merge(..., by = "names", all=TRUE), rr))
  rownames(m) <- m$names
  
  if (!below) {
    sort.names <- c(varnames, setdiff(Reduce(union, lapply(rr, function(x) x$names)), varnames))
    m <- m[sort.names, ]
    if(!is.null(var.labels)) {
      m[varnames, "names"] <- var.labels
    }
    
    if(is.null(model.names)) model.names <- paste(col.names, 1:((ncol(m)-1)/2))
    
    m
    
  }
  
  if (below == TRUE){
    
    colnames(m) <- c("names", paste("Model", 1:length(rr), sep = " "))
    
    sort.names <- unique(unlist(lapply(rr, function(x) x$names)))
    
    # Put the stats at the end of the table
    t <- which(sort.names%in%c("R2", "N"))
    sort.names <- c(sort.names[-t], "R2", "N")
    
    m <- m[sort.names, ]
    
    t <- which(grepl("-se", m$names))
    m$names[t] <- ""
    
    m$names[-t]
    
    # Variable names/labels
    m$names[1:(2*length(var.labels))] <- c(t(cbind(var.labels, "")))
  }
  
  m[rownames(m)!="NA", ]
  
}


#### rescale to 0, 1
range01 <- function(x){
  if(length(x)==1) return(x)
  (x-min(x, na.rm = TRUE))/(max(x, na.rm = TRUE)-min(x, na.rm = TRUE))
}

#### strsplit and retain the n'th element

strsplitm <- function(x, w, n) unlist(lapply(strsplit(x, w), function(x) x[n]))
strsplitmn <- function(x, w, n) ifelse(grepl(
  w, x), strsplitm(x, w, n), "")

#### CENTER VARIABLES

cntr <- function(x){x-mean(x,na.rm=TRUE)}

#### MAKE LAGS BASED ON FACTORS

lag <- function(x, s, factor=NULL){
  lagtemp <- function(x,s) {
    if(length(x) > s)  M = c(rep(NA, length=s), rev(rev(x)[-(1:s)]))
    if(length(x) <= s) M = rep(NA, length=length(x))
    M
  }
  lagtemp2 <- function(x, s, factor){
    if (is.null(factor)) return(lagtemp(x, s))
    if (!is.null(factor)) return(unlist(tapply(x, factor(factor), lagtemp, s=s)))
  }
  sapply(1:s, lagtemp2, x=x, factor=factor)
}

lead <- function(x, s, factor=NULL){
  lagtemp <- function(x,s) {
    if(length(x) > s)  M = c(x[-(1:s)], rep(NA, length=s))
    if(length(x) <= s) M = rep(NA, length=length(x))
    M
  }
  lagtemp2 <- function(x, s, factor){
    if (is.null(factor)) return(lagtemp(x, s))
    if (!is.null(factor)) return(unlist(tapply(x, factor, lagtemp, s=s)))
  }
  sapply(1:s, lagtemp2, x=x, factor=factor)
}


##### Combine results from regressions

outtex <- function(models, below = TRUE, ci = FALSE, ci.approximate = FALSE, omit = NULL, only = NULL, single.var = FALSE, caption = NULL, label = NULL, var.labels = NULL, model.names = NULL, col.names = "", table.placement = 't!', space = "50pt", robust = FALSE, size = "normalsize", file = ""){
  
  outr <- function(model, below, ci, omit, only, single.var){
    
    if (class(model)[1]%in%c("gam", "gamm")) output <- data.frame(summary(model)$p.table)
    if (!class(model)[1]%in%c("gam", "gamm")) output <- data.frame(coefficients(summary(model)))
    
    if (!is.null(omit)) vars <- which(!grepl(paste(omit, collapse="|"), rownames(output)))
    
    if (!is.null(only)) vars <- which(grepl(paste(omit, collapse="|"), rownames(output)))
    
    if(robust) output <- coeftest(model,  vcov=function(x) vcovHC(x, type="HC1"))
    
    output  <- output[vars, ]
    varnames <- rownames(output)
    
    if(class(model)[1]%in%c("lmerMod", "glmerMod")) output$p <- 2*pnorm(abs(output[, 3]), lower.tail = FALSE)
    
    r <- output[, 1:2]
    p <- output[, 4]
    p <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "^{**}", ifelse(p < 0.05, "^{*}", "")))
    se <- paste("(", roundr(r[,2], 2), ")", sep="")
    
    if (ci == TRUE) {
      if(ci.approximate) ci <- roundr(cbind(output[,1]-1.96*output[,2], output[,1]+1.96*output[,2]))
      
      if(!ci.approximate) ci <- roundr(confint(model, par = varnames))
      se <- paste("(", ci[,1], ", ", ci[,2], ")" , sep = "")
      p <- ""
    }
    
    m <- data.frame(names = varnames, b = paste(roundr(r[,1]), p, sep=""), se = se, stringsAsFactors = FALSE)
    
    
    
    if (class(model)[1]%in%c("lm", "lmrob")) {
      n <- prettyNum(roundr(nrow(model.frame(model)), 0), big.mark=",", preserve.width="none")
      r2 <- roundr(summary(model)$adj.r.squared, 2)
      names(r2) <- "Adj. $R^2$"
    }
    
    
    if (class(model)[1]%in%c("gam")) {
      n <- prettyNum(roundr(nrow(model.frame(model)), 0), big.mark=",", preserve.width="none")
      r2 <- roundr(summary(model)$r.sq, 2)
      names(r2) <- "Adj. $R^2$"
    }
    
    
    if (class(model)[1]%in%c("plm")) {
      n <- prettyNum(roundr(nrow(model.frame(model)), 0), big.mark=",", preserve.width="none")
      r2 <- roundr(summary(model)$r.squared[2])
      names(r2) <- "Adj. $R^2$"
    }
    
    if(class(model)[1]%in%c("lmerMod")) {
      n <- prettyNum(roundr(nrow(model.frame(model)), 0), big.mark=",", preserve.width="none")
      r2 <- roundr(cor(predict(model), model.frame(model)[,1])^2, 2)
      names(r2) <- "Adj. $R^2$"
    }
    
    if (class(model)[1]%in%c("glm", "glmerMod")){
      n <- prettyNum(roundr(nrow(model.frame(model)), 0), big.mark=",", preserve.width="none")
      r2 <- prettyNum(roundr(logLik(model), 0), big.mark=",", preserve.width="none")
      names(r2) <- "Log-likelihood"
    }
    
    if (class(model)[1]=="rlm") {
      n <- prettyNum(roundr(nrow(model.frame(model)), 0), big.mark=",", preserve.width="none")
      r2 <- roundr(cor(predict(model), model.frame(model)[,1])^2,2)
      names(r2) <- "Adj. $R^2$"
    }
    
    if (class(model)[1]=="logitmfx"){
      n <- ""
      r2 <- ""
      names(r2) <- "Log-likelihood"
    }
    
    if(below==FALSE) { 
      if(single.var == TRUE)  m <- data.frame(m, r2 = r2, n = n)
      if(single.var == FALSE) {
        fit <- c(names(r2), paste("\\multicolumn{1}{r}{", r2, "}", sep = ""), "")
        n <- c("Observations", paste("\\multicolumn{1}{r}{", n, "}", sep = ""), "")
        m <- rbind(m, fit, n)
        m <- data.frame(m)
      }}
    
    if(below==TRUE)  {
      
      r2 <- paste("\\multicolumn{1}{d}{", r2, "}", sep = "")
      n <- paste("\\multicolumn{1}{c}{", n, "}", sep = "")  
      est <- c(t(m[,-1]))
      est <- as.matrix(c(est, r2, n))
      names <- c(t(cbind(m$names, paste(m$names, "se", sep = "-"))))
      names <- c(names, "R2", "N")
      if (class(model)[1]%in%c("glm", "logitmfx")) names[which(names=="R2")] <- "Log-Likelihood"
      
      m <- data.frame(names = names, est = est, stringsAsFactors = FALSE)
    }
    
    list(m = m, varnames = varnames)
  }
  
  rr <- lapply(models, outr, ci = ci, below = below, omit = omit, only = only, single.var = single.var)
  varnames <- unique(unlist(lapply(rr, function(x) x$varnames)))
  rr <- lapply(rr, function(x) x$m)
  m <- suppressWarnings(Reduce(function(...) merge(..., by = "names", all=TRUE), rr))
  rownames(m) <- m$names
  sort.names <- c(varnames, setdiff(Reduce(union, lapply(rr, function(x) x$names)), varnames))
  
  #sort.names <- c(sort.names, rr[[1]]$names[-(1:(length(x$names)-2))])
  
  if (!below) {
    m <- m[sort.names, ]
    if(!is.null(var.labels)) {
      m[varnames, "names"] <- var.labels
    }
    if(is.null(model.names)) model.names <- paste(col.names, 1:((ncol(m)-1)/2))
    return(
      print(xtable(m, 
                   caption = caption,
                   label = label,
                   align = c("l@{\\hskip 5pt}", "l", paste(rep("d", ncol(m)-2), rep(c("@{\\hskip -10pt}", paste("@{\\hskip ", space, "  }", sep ="")), (ncol(m)-1)/3), sep = ""), "d@{\\hskip 20pt}")), 
            sanitize.text.function = identity,
            size = size,
            file = file,
            hline.after = NULL,
            include.rownames = FALSE, 
            include.colnames = FALSE,
            table.placement = table.placement,
            caption.placement = 'bottom',
            add.to.row = list(pos = as.list(c(-1, 0, 0, nrow(m)-2, nrow(m), nrow(m))), 
                              command = c("\\toprule\n", paste("&", paste(paste("\\multicolumn{2}{c}{", model.names, "}", sep = ""), collapse = "&"), "\\\\\n",collapse=""), "\\midrule\n", "\\midrule\n", "\\bottomrule\n", ifelse(ci, paste("\\multicolumn{", ncol(m), "}{l}{{\\footnotesize Note: 95 percent confidence intervals in parentheses.\n}}", sep = "") , paste("\\multicolumn{", ncol(m), "}{l}{{\\footnotesize Note:", ifelse(robust, " Robust standard ", " Standard "),"errors in parentheses; $^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{\\dagger}$p$<$0.001.\n}}", sep = ""))))
      ))
  }
  
  if (below == TRUE){
    
    colnames(m) <- c("names", paste("Model", 1:length(rr), sep = " "))
    
    # Put the stats at the end of the table
    t <- which(sort.names%in%c("R2", "N"))
    sort.names <- c(sort.names[-t], "R2", "N")
    
    m <- m[sort.names, ]
    
    t <- which(grepl("-se", m$names))
    m$names[t] <- ""
    
    m$names[-t]
    
    # Variable names/labels
    
  }
  m[rownames(m)!="NA", ]
  
}


##### polynomial

pol=function(x,degree){
  x=(x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE)
  return(sapply(1:degree,function(z) x^degree))
}


#### plot the fit of the model

plot.fit = function(x, y){
  plot(x, y, pch=16, cex=0.5)	
  data=data.frame(x=c(x), y=c(y))
  data=data[complete.cases(data),]
  fit=lm(y ~ x, data=data)
  abline(fit, col="blue", lwd=2)
  abline(0,1, col="red", lwd=2)	
  abline(h=c(round(min(data$y)):round(max(data$y))), lty=2, lwd=.75, col="grey")
  abline(v=c(round(min(data$x)):round(max(data$x))), lty=2, lwd=.75, col="grey")
  text(min(data$x)+sd(data$x), max(data$y) - sd(data$y),paste("MSE", round(summary(fit)$sigma/sd(data$y, na.rm=TRUE),2), sep=" = "))
}



m.model.matrix=function(formula, data, res=TRUE){
  if(res==TRUE) formula[[2]]=NULL
  M = model.matrix(formula,data)
  M=as.matrix(M)
  M=as.matrix(M[match(rownames(data),rownames(M)),])
  rownames(M)=rownames(data)
  return(M)
}


# plot estimates plus asymptotic confidence intervals

pplot <- function(x, conf=0.95){
  # x is a 2*P matrix, first row estimate, second row SE
  ci = cbind(x[1,]-qnorm((1-conf)/2)*x[2,], x[1,]+qnorm((1-conf)/2)*x[2,])
  plot(1:ncol(x), ylim=range(ci), ylab="")
  points(1:ncol(x), x[1,], pch=16)
  segments(1:ncol(x), ci[,1], 1:ncol(x), ci[,2])
}

z <- function(x) (x - mean(x, na.rm=TRUE))/sd(x, na.rm=TRUE)

is.odd <- function(x) round(x/2)!=(x/2)

logit <-  function(x) log(x/(1-x))

inv.logit <- function(x) 1/(1+exp(-x))

ciplot <- function(x, main=NULL) {
  x = as.matrix(x)
  ci = apply(x, 2, function(x) quantile(x, c(0.025, .5, 0.975)))
  plot(1:ncol(x), ci[2,], xlim=c(1,ncol(x)), ylim=c(min(c(x)), max(c(x))), pch=16, main=main) 
  abline(h=0, lty=3, col="grey")
  segments(1:ncol(x), ci[1, ], 1:ncol(x), ci[3,], lwd=2)
}

# 
# library( grid )     # for units
# simple_theme <- theme(    #   remove the gray background
#   panel.background    = element_blank() ,
#   #   make the major gridlines light gray and thin
#   panel.grid.major.y  = element_blank() ,
#   #   suppress the vertical grid lines
#   panel.grid.major.x  = element_blank() ,
#   #   suppress the minor grid lines
#   panel.grid.minor    = element_blank() ,
#   #   add axes
#   axis.line           = element_line( size=.2 , colour="#666666" ),
#   #   adjust the axis ticks
#   axis.ticks          = element_line( size=.2 , colour="#666666" ),
#   #   move the y-axis over to the left 
#   axis.title.y        = element_text( angle=90, vjust=-.1, hjust=.5 ),
#   #   increase fontsize for x-axis
#   axis.title.x        = element_text(size=16),
#   #   increase margin moved-over y-axis label fits
#   plot.margin = unit( c(0,0,0,0) , "in") 
# )


phtest_glmer <- function (Mod1, Mod2, ...)  {  ## changed function call
  coef.1 <- coefficients(summary(Mod1))[,1]
  coef.2 <- coefficients(summary(Mod2))[,1]  ## changed coef() to fixef() for glmer
  vcov.1 <- vcov(Mod1)
  vcov.2 <- vcov(Mod2)
  names.1 <- names(coef.1)
  names.2 <- names(coef.2)
  coef.h <- names.2[names.2 %in% names.1]
  dbeta <- coef.1[coef.h] - coef.2[coef.h]
  df <- length(dbeta)
  dvcov <- vcov.2[coef.h, coef.h] - vcov.1[coef.h, coef.h]
  stat <- abs(t(dbeta) %*% as.matrix(solve(dvcov)) %*% dbeta)  ## added as.matrix()
  pval <- pchisq(stat, df = df, lower.tail = FALSE)
  names(stat) <- "chisq"
  parameter <- df
  names(parameter) <- "df"
  alternative <- "one model is inconsistent"
  res <- list(statistic = stat, p.value = pval, parameter = parameter, 
              method = "Hausman Test",  alternative = alternative)
  class(res) <- "htest"
  res
}

roundr <- function(x, n = 2) {
  f <- function(x, n) sprintf(paste("%.", n, "f", sep=""), round(x, n))
  if (is.null(dim(x))) out <- f(x, n)
  if (!is.null(dim(x))) out <- apply(x, 2, f, n = n)
  out
}


construct.geocode.url <- function(address, return.call = "json", sensor = "false") {
  root <- "http://maps.google.com/maps/api/geocode/"
  u <- paste(root, return.call, "?address=", address, "&sensor=", sensor, sep = "")
  return(URLencode(u))
}

gGeoCode <- function(address,verbose=FALSE) {
  if(verbose) cat(address,"\n")
  u <- construct.geocode.url(address)
  doc <- getURL(u)
  x <- fromJSON(doc)
  if(x$status=="OK") {
    lat <- x$results[[1]]$geometry$location$lat
    lng <- x$results[[1]]$geometry$location$lng
    return(c(lat, lng))
  } else {
    return(c(NA,NA))
  }
}


n <- function(x, n) list(which = order(-x)[n], value = -sort(-x)[n])




# Function identifying n largest and smalles values in the variable:
outly <- function(x, N){
  if(n == 0) return(x)
  z <- c(order(x, decreasing = TRUE)[1:N], order(x, decreasing = FALSE)[1:N])
  x[z] <- NA
  x
}

# Functions
findword <- function(w, x) sapply(tolower(x), function(x) sum(grepl(w, strsplit(x, " ")[[1]])))



#########################################################################################
#---------------------- Functions for preparing CrowdFlower data -----------------------#
#########################################################################################

parse_into_option_dummies <- function(data, variable) {
  # Splits CrowdFlower strings with attribution responses into binary indicator variables
  data <- as.data.frame(data)
  responses <- strsplit(x = data[,variable], split = '\n')
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_not_mentioned')] <- sapply(responses, function(k) ifelse("1" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_putin')] <- sapply(responses, function(k) ifelse("2" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_rugov')] <- sapply(responses, function(k) ifelse("3" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_rubis')] <- sapply(responses, function(k) ifelse("4" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_forgov')] <- sapply(responses, function(k) ifelse("5" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_forbis')] <- sapply(responses, function(k) ifelse("6" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_forec')] <- sapply(responses, function(k) ifelse("7" %in% k, 1, 0))
  data[, paste0(substr(x = variable, start = 1, stop = 3), '_other')] <- sapply(responses, function(k) ifelse("999" %in% k, 1, 0))
  data <- data.table(data)
  return(data)
}

make_sentiment_confidence <- function(data, reference_data) {
  # Computes confidence for sentiment scores as proportion of responses with a given sentiment score
  data$sentiment_conf <- NA
  data$sentiment_conf[data$sentiment == -1 & !is.na(data$sentiment)] <- reference_data$neg_w_prop[data$sentiment == -1 & !is.na(data$sentiment)]
  data$sentiment_conf[data$sentiment == 1 & !is.na(data$sentiment)] <- reference_data$pos_w_prop[data$sentiment == 1 & !is.na(data$sentiment)]
  data$sentiment_conf[data$sentiment == 0 & !is.na(data$sentiment)] <- reference_data$neut_w_prop[data$sentiment == 0 & !is.na(data$sentiment)]
  return(data)
}

make_option_majority_dummies <- function(data) {
  # Generates binary indicators: 1 if the majority of coders attributed a news fragment to a given category
  data$neg_putin_bin <- ifelse(data$neg_putin > 0.5, 1, 0)
  data$neg_rugov_bin <- ifelse(data$neg_rugov > 0.5, 1, 0)
  data$neg_rubis_bin <- ifelse(data$neg_rubis > 0.5, 1, 0)
  data$neg_forgov_bin <- ifelse(data$neg_forgov > 0.5, 1, 0)
  data$neg_forbis_bin <- ifelse(data$neg_forbis > 0.5, 1, 0)
  data$neg_forec_bin <- ifelse(data$neg_forec > 0.5, 1, 0)
  data$neg_other_bin <- ifelse(data$neg_other > 0.5, 1, 0)
  
  data$pos_putin_bin <- ifelse(data$pos_putin > 0.5, 1, 0)
  data$pos_rugov_bin <- ifelse(data$pos_rugov > 0.5, 1, 0)
  data$pos_rubis_bin <- ifelse(data$pos_rubis > 0.5, 1, 0)
  data$pos_forgov_bin <- ifelse(data$pos_forgov > 0.5, 1, 0)
  data$pos_forbis_bin <- ifelse(data$pos_forbis > 0.5, 1, 0)
  data$pos_forec_bin <- ifelse(data$pos_forec > 0.5, 1, 0)
  data$pos_other_bin <- ifelse(data$pos_other > 0.5, 1, 0)
  
  # merge neg_forbis and neg_forec, pos_forbis and pos_forec
  data$neg_forec_bin <- ifelse(data$neg_forbis_bin == 1, 1, data$neg_forec_bin)
  data$pos_forec_bin <- ifelse(data$pos_forbis_bin == 1, 1, data$pos_forec_bin)
  data <- subset(data, select = -c(neg_forbis, pos_forbis, neg_forbis_bin, pos_forbis_bin))
  
  return(data)
}


count_mentions <- function(string, text) {
  matcher <- gregexpr(string, tolower(text))
  found <- regmatches(text, matcher)
  num_found <- sapply(found, function(k) length(k))
  return(num_found)
}

######################################################################################
#---------------------- Functions for Figure 2 and Appendix B -----------------------#
######################################################################################

plot_coverage <- function(K, nb = FALSE) {
  
  outly <<- function(x){
    z <- (abs(scale(x)) > K)
    x[z] <- NA
    x
  }
  
  par(mfrow = c(2, 2), mar = c(3, 3, 1.5, 1), mgp = c(1.75, .6, 0))
  
  y.lim <- c(0, 1)
  y.axt <- "n"
  y.lab <- expression(bold("Predicted coverage"))
  x.lab <- expression(bold("Daily log returns (percent)"))
  
  fit <- NULL
  fit[[1]] <- gam(I(mention.rts > 0) ~ te(outly(100*rts.change)), data = econ_data, na.action = na.exclude, family = binomial(link = probit))
  fit[[2]] <- update(fit[[1]], I(mention.exc > 0) ~ te(outly(100*exc.change)))
  fit[[3]] <- update(fit[[1]], I(mention.oil > 0) ~ te(outly(100*oil.change)))
  fit[[4]] <- gam(mention.inf ~ te(outly(100*inf.change)), data = data_monthly)
  
  if(nb){
    y.lab <- expression(bold("Coverage intensity"))
    fit[[1]] <- gam(mention.rts ~ te(outly(100*rts.change)), data = econ_data, na.action = na.exclude, family = nb())
    fit[[2]] <- update(fit[[1]], mention.exc ~ te(outly(100*exc.change)), family = nb())
    fit[[3]] <- update(fit[[1]], mention.oil ~ te(outly(100*oil.change)), family = nb())
    fit[[4]] <- gam(mention.inf_count ~ te(outly(100*inf.change)), data = data_monthly, family = nb())
    y.lim <- NULL
    y.axt <- NULL
  }
  
  gam.plot <- function(fit, ylim = y.lim, trans = ifelse(nb, exp, pnorm), ylab = y.lab, xlab = x.lab, x = 0, dir = "+", ...){
    plot(fit, xlab = xlab, ylab = ylab, xaxs = "i", col.lab="black", shade = TRUE, shade.col = adjustcolor('dark blue', alpha=0.2), yaxt = y.axt, ylim = ylim, trans = trans, shift = coefficients(fit)[1], ...)
    if(!nb){
      axis(2, seq(0, 1, by = 0.2), las = 2)
      grid(NA, NULL, lty = 1, col = adjustcolor('grey', alpha = 0.3))
      text(x, 0.1, ifelse(dir == "+", "Bad news <---> Good news", "Good news <---> Bad news"))
    }
  }
  
  gam.plot(fit[[1]], main = "(1) RTS index")
  gam.plot(fit[[2]], main = "(2) RUB/USD", ylab = "", dir = "-")
  gam.plot(fit[[3]], main = "(3) Oil price")
  gam.plot(fit[[4]], trans = I, xlab = "Monthly change", main = "(4) Consumer price index", ylab = "", x = 1.6, dir = "-", font.lab=2 )
  
}


########################################################################
#---------------------- Functions for Figure 3  -----------------------#
########################################################################

my.rdplot <- function (y, x, c = 0, p = 4, nbins = NULL, binselect = "esmv", 
                       scale = NULL, kernel = "uni", weights = NULL, h = NULL, covs = NULL, 
                       support = NULL, subset = NULL, hide = FALSE, ci = NULL, shade = FALSE, 
                       par = NULL, title = NULL, x.label = NULL, y.label = NULL, 
                       x.lim = NULL, y.lim = NULL, col.dots = NULL, col.lines = NULL, 
                       type.dots = NULL, line.lwd = 3, ...) 
{
  if (!is.null(subset)) {
    x <- x[subset]
    y <- y[subset]
  }
  na.ok <- complete.cases(x) & complete.cases(y)
  if (!is.null(covs)) {
    covs = as.matrix(covs)
    dZ = ncol(covs)
    if (!is.null(subset)) 
      covs <- subset(covs, subset)
    for (i in 1:dZ) {
      na.ok <- na.ok & complete.cases(covs[, i])
    }
  }
  x <- x[na.ok]
  y <- y[na.ok]
  if (!is.null(covs)) 
    covs <- subset(covs, na.ok)
  x_min = min(x)
  x_max = max(x)
  x_l = x[x < c]
  x_r = x[x >= c]
  y_l = y[x < c]
  y_r = y[x >= c]
  if (!is.null(support)) {
    support_l = support[1]
    support_r = support[2]
    if (support_l < x_min) 
      x_min = support_l
    if (support_r > x_max) 
      x_max = support_r
  }
  range_l = c - x_min
  range_r = x_max - c
  n_l = length(x_l)
  n_r = length(x_r)
  n = n_l + n_r
  meth = "es"
  if (is.null(scale)) {
    scale = scale_l = scale_r = 1
  }
  else {
    if (length(scale) == 1) 
      scale_l = scale_r = scale
    if (length(scale) == 2) {
      scale_l = scale[1]
      scale_r = scale[2]
    }
  }
  if (!is.null(nbins)) {
    if (length(nbins) == 1) 
      nbins_l = nbins_r = nbins
    if (length(nbins) == 2) {
      nbins_l = nbins[1]
      nbins_r = nbins[2]
    }
  }
  if (is.null(h)) {
    h_l = range_l
    h_r = range_r
  }
  else {
    if (length(h) == 1) 
      h_l = h_r = h
    if (length(h) == 2) {
      h_l = h[1]
      h_r = h[2]
    }
  }
  k = 4
  flag_no_ci <- FALSE
  if (is.null(ci)) {
    ci <- 95
    flag_no_ci <- TRUE
  }
  kernel_type = "Uniform"
  if (kernel == "epanechnikov" | kernel == "epa") 
    kernel_type = "Epanechnikov"
  if (kernel == "triangular" | kernel == "tri") 
    kernel_type = "Triangular"
  exit = 0
  if (c <= x_min | c >= x_max) {
    print("c should be set within the range of x")
    exit = 1
  }
  if (kernel != "uni" & kernel != "uniform" & kernel != "tri" & 
      kernel != "triangular" & kernel != "epa" & kernel != 
      "epanechnikov" & kernel != "") {
    print("kernel incorrectly specified")
    exit = 1
  }
  if (p < 0) {
    print("p should be a positive number")
    exit = 1
  }
  if (scale <= 0 | scale_l <= 0 | scale_r <= 0) {
    print("scale should be a positive number")
    exit = 1
  }
  p_ceiling = ceiling(p)/p
  if (p_ceiling != 1 & p > 0) {
    print("p should be an integer number")
    exit = 1
  }
  if (exit > 0) 
    stop()
  R_p_l = matrix(NA, n_l, p + 1)
  R_p_r = matrix(NA, n_r, p + 1)
  for (j in 1:(p + 1)) {
    R_p_l[, j] = (x_l - c)^(j - 1)
    R_p_r[, j] = (x_r - c)^(j - 1)
  }
  W_h_l = rdrobust_kweight(x_l, c, h_l, kernel)
  W_h_r = rdrobust_kweight(x_r, c, h_r, kernel)
  n_h_l = sum(W_h_l > 0)
  n_h_r = sum(W_h_r > 0)
  if (!is.null(weights)) {
    fw_l = weights[x < c]
    fw_r = weights[x >= c]
    W_h_l = fw_l * W_h_l
    W_h_r = fw_r * W_h_r
  }
  invG_p_l = qrXXinv((sqrt(W_h_l) * R_p_l))
  invG_p_r = qrXXinv((sqrt(W_h_r) * R_p_r))
  if (is.null(covs)) {
    gamma_p1_l = qrXXinv((sqrt(W_h_l) * R_p_l)) %*% crossprod(R_p_l * 
                                                                W_h_l, y_l)
    gamma_p1_r = qrXXinv((sqrt(W_h_r) * R_p_r)) %*% crossprod(R_p_r * 
                                                                W_h_r, y_r)
  }
  else {
    z_l = covs[x < c, ]
    z_r = covs[x >= c, ]
    D_l = cbind(y_l, z_l)
    D_r = cbind(y_r, z_r)
    U_p_l = crossprod(R_p_l * W_h_l, D_l)
    U_p_r = crossprod(R_p_r * W_h_r, D_r)
    beta_p_l = invG_p_l %*% crossprod(R_p_l * W_h_l, D_l)
    beta_p_r = invG_p_r %*% crossprod(R_p_r * W_h_r, D_r)
    ZWD_p_l = crossprod(z_l * W_h_l, D_l)
    ZWD_p_r = crossprod(z_r * W_h_r, D_r)
    colsZ = 2:max(c(2 + dZ - 1, 2))
    UiGU_p_l = crossprod(U_p_l[, colsZ], invG_p_l %*% U_p_l)
    UiGU_p_r = crossprod(U_p_r[, colsZ], invG_p_r %*% U_p_r)
    ZWZ_p_l = ZWD_p_l[, colsZ] - UiGU_p_l[, colsZ]
    ZWZ_p_r = ZWD_p_r[, colsZ] - UiGU_p_r[, colsZ]
    ZWY_p_l = ZWD_p_l[, 1] - UiGU_p_l[, 1]
    ZWY_p_r = ZWD_p_r[, 1] - UiGU_p_r[, 1]
    ZWZ_p = ZWZ_p_r + ZWZ_p_l
    ZWY_p = ZWY_p_r + ZWY_p_l
    gamma_p = chol2inv(chol(ZWZ_p)) %*% ZWY_p
    s_Y = c(1, -gamma_p[, 1])
    gamma_p1_l = t(s_Y %*% t(beta_p_l))
    gamma_p1_r = t(s_Y %*% t(beta_p_r))
  }
  nplot = 500
  x_plot_l = seq(c - h_l, c, length.out = nplot)
  x_plot_r = seq(c, c + h_r, length.out = nplot)
  rplot_l = matrix(NA, nplot, p + 1)
  rplot_r = matrix(NA, nplot, p + 1)
  for (j in 1:(p + 1)) {
    rplot_l[, j] = (x_plot_l - c)^(j - 1)
    rplot_r[, j] = (x_plot_r - c)^(j - 1)
  }
  y_hat_l = rplot_l %*% gamma_p1_l
  y_hat_r = rplot_r %*% gamma_p1_r
  rk_l = matrix(NA, n_l, (k + 1))
  rk_r = matrix(NA, n_r, (k + 1))
  for (j in 1:(k + 1)) {
    rk_l[, j] = x_l^(j - 1)
    rk_r[, j] = x_r^(j - 1)
  }
  gamma_k1_l = qrXXinv(rk_l) %*% crossprod(rk_l, y_l)
  gamma_k2_l = qrXXinv(rk_l) %*% crossprod(rk_l, y_l^2)
  gamma_k1_r = qrXXinv(rk_r) %*% crossprod(rk_r, y_r)
  gamma_k2_r = qrXXinv(rk_r) %*% crossprod(rk_r, y_r^2)
  mu0_k1_l = rk_l %*% gamma_k1_l
  mu0_k1_r = rk_r %*% gamma_k1_r
  mu0_k2_l = rk_l %*% gamma_k2_l
  mu0_k2_r = rk_r %*% gamma_k2_r
  drk_l = matrix(NA, n_l, k)
  drk_r = matrix(NA, n_r, k)
  for (j in 1:k) {
    drk_l[, j] = j * x_l^(j - 1)
    drk_r[, j] = j * x_r^(j - 1)
  }
  ind_l = order(x_l)
  ind_r = order(x_r)
  x_i_l = x_l[ind_l]
  y_i_l = y_l[ind_l]
  x_i_r = x_r[ind_r]
  y_i_r = y_r[ind_r]
  dxi_l = (x_i_l[2:length(x_i_l)] - x_i_l[1:(length(x_i_l) - 
                                               1)])
  dxi_r = (x_i_r[2:length(x_i_r)] - x_i_r[1:(length(x_i_r) - 
                                               1)])
  dyi_l = (y_i_l[2:length(y_i_l)] - y_i_l[1:(length(y_i_l) - 
                                               1)])
  dyi_r = (y_i_r[2:length(y_i_r)] - y_i_r[1:(length(y_i_r) - 
                                               1)])
  x_bar_i_l = (x_i_l[2:length(x_i_l)] + x_i_l[1:(length(x_i_l) - 
                                                   1)])/2
  x_bar_i_r = (x_i_r[2:length(x_i_r)] + x_i_r[1:(length(x_i_r) - 
                                                   1)])/2
  drk_i_l = matrix(NA, n_l - 1, k)
  rk_i_l = matrix(NA, n_l - 1, (k + 1))
  drk_i_r = matrix(NA, n_r - 1, k)
  rk_i_r = matrix(NA, n_r - 1, (k + 1))
  for (j in 1:(k + 1)) {
    rk_i_l[, j] = x_bar_i_l^(j - 1)
    rk_i_r[, j] = x_bar_i_r^(j - 1)
  }
  for (j in 1:k) {
    drk_i_l[, j] = j * x_bar_i_l^(j - 1)
    drk_i_r[, j] = j * x_bar_i_r^(j - 1)
  }
  mu1_i_hat_l = drk_i_l %*% (gamma_k1_l[2:(k + 1)])
  mu1_i_hat_r = drk_i_r %*% (gamma_k1_r[2:(k + 1)])
  mu0_i_hat_l = rk_i_l %*% gamma_k1_l
  mu0_i_hat_r = rk_i_r %*% gamma_k1_r
  mu2_i_hat_l = rk_i_l %*% gamma_k2_l
  mu2_i_hat_r = rk_i_r %*% gamma_k2_r
  mu0_hat_l = rk_l %*% gamma_k1_l
  mu0_hat_r = rk_r %*% gamma_k1_r
  mu2_hat_l = rk_l %*% gamma_k2_l
  mu2_hat_r = rk_r %*% gamma_k2_r
  mu1_hat_l = drk_l %*% (gamma_k1_l[2:(k + 1)])
  mu1_hat_r = drk_r %*% (gamma_k1_r[2:(k + 1)])
  mu1_i_hat_l = drk_i_l %*% (gamma_k1_l[2:(k + 1)])
  mu1_i_hat_r = drk_i_r %*% (gamma_k1_r[2:(k + 1)])
  sigma2_hat_l_bar = mu2_i_hat_l - mu0_i_hat_l^2
  sigma2_hat_r_bar = mu2_i_hat_r - mu0_i_hat_r^2
  sigma2_hat_l = mu2_hat_l - mu0_hat_l^2
  sigma2_hat_r = mu2_hat_r - mu0_hat_r^2
  J.fun = function(B, V) {
    ceiling((((2 * B)/V) * n)^(1/3))
  }
  var_y_l = var(y_l)
  var_y_r = var(y_r)
  B_es_hat_dw = c(((c - x_min)^2/(12 * n)) * sum(mu1_hat_l^2), 
                  ((x_max - c)^2/(12 * n)) * sum(mu1_hat_r^2))
  V_es_hat_dw = c((0.5/(c - x_min)) * sum(dxi_l * dyi_l^2), 
                  (0.5/(x_max - c)) * sum(dxi_r * dyi_r^2))
  V_es_chk_dw = c((1/(c - x_min)) * sum(dxi_l * sigma2_hat_l_bar), 
                  (1/(x_max - c)) * sum(dxi_r * sigma2_hat_r_bar))
  J_es_hat_dw = J.fun(B_es_hat_dw, V_es_hat_dw)
  J_es_chk_dw = J.fun(B_es_hat_dw, V_es_chk_dw)
  B_qs_hat_dw = c((n_l^2/(24 * n)) * sum(dxi_l^2 * mu1_i_hat_l^2), 
                  (n_r^2/(24 * n)) * sum(dxi_r^2 * mu1_i_hat_r^2))
  V_qs_hat_dw = c((1/(2 * n_l)) * sum(dyi_l^2), (1/(2 * n_r)) * 
                    sum(dyi_r^2))
  V_qs_chk_dw = c((1/n_l) * sum(sigma2_hat_l), (1/n_r) * sum(sigma2_hat_r))
  J_qs_hat_dw = J.fun(B_qs_hat_dw, V_qs_hat_dw)
  J_qs_chk_dw = J.fun(B_qs_hat_dw, V_qs_chk_dw)
  J_es_hat_mv = c(ceiling((var_y_l/V_es_hat_dw[1]) * (n/log(n)^2)), 
                  ceiling((var_y_r/V_es_hat_dw[2]) * (n/log(n)^2)))
  J_es_chk_mv = c(ceiling((var_y_l/V_es_chk_dw[1]) * (n/log(n)^2)), 
                  ceiling((var_y_r/V_es_chk_dw[2]) * (n/log(n)^2)))
  J_qs_hat_mv = c(ceiling((var_y_l/V_qs_hat_dw[1]) * (n/log(n)^2)), 
                  ceiling((var_y_r/V_qs_hat_dw[2]) * (n/log(n)^2)))
  J_qs_chk_mv = c(ceiling((var_y_l/V_qs_chk_dw[1]) * (n/log(n)^2)), 
                  ceiling((var_y_r/V_qs_chk_dw[2]) * (n/log(n)^2)))
  if (binselect == "es") {
    J_star_orig = J_es_hat_dw
    meth = "es"
    binselect_type = "IMSE-optimal evenly-spaced method using spacings estimators"
    J_IMSE = J_es_hat_dw
    J_MV = J_es_hat_mv
  }
  if (binselect == "espr") {
    J_star_orig = J_es_chk_dw
    meth = "es"
    binselect_type = "IMSE-optimal evenly-spaced method using polynomial regression"
    J_IMSE = J_es_chk_dw
    J_MV = J_es_chk_mv
  }
  if (binselect == "esmv") {
    J_star_orig = J_es_hat_mv
    meth = "es"
    binselect_type = "mimicking variance evenly-spaced method using spacings estimators"
    J_IMSE = J_es_hat_dw
    J_MV = J_es_hat_mv
  }
  if (binselect == "esmvpr") {
    J_star_orig = J_es_chk_mv
    meth = "es"
    binselect_type = "mimicking variance evenly-spaced method using polynomial regression"
    J_IMSE = J_es_chk_dw
    J_MV = J_es_chk_mv
  }
  if (binselect == "qs") {
    J_star_orig = J_qs_hat_dw
    meth = "qs"
    binselect_type = "IMSE-optimal quantile-spaced method using spacings estimators"
    J_IMSE = J_qs_hat_dw
    J_MV = J_qs_hat_mv
  }
  if (binselect == "qspr") {
    J_star_orig = J_qs_chk_dw
    meth = "qs"
    binselect_type = "IMSE-optimal quantile-spaced method using polynomial regression"
    J_IMSE = J_qs_chk_dw
    J_MV = J_qs_chk_mv
  }
  if (binselect == "qsmv") {
    J_star_orig = J_qs_hat_mv
    meth = "qs"
    binselect_type = "mimicking variance quantile-spaced method using spacings estimators"
    J_IMSE = J_qs_hat_dw
    J_MV = J_qs_hat_mv
  }
  if (binselect == "qsmvpr") {
    J_star_orig = J_qs_chk_mv
    meth = "qs"
    binselect_type = "mimicking variance quantile-spaced method using polynomial regression"
    J_IMSE = J_qs_chk_dw
    J_MV = J_qs_chk_mv
  }
  J_star_l = scale_l * J_star_orig[1]
  J_star_r = scale_r * J_star_orig[2]
  if (!is.null(nbins)) {
    J_star_l = nbins_l
    J_star_r = nbins_r
    binselect_type = "manually evenly spaced"
  }
  if (var_y_l == 0) {
    J_star_l = J_star_l_orig = 1
    print("Warning: not enough variability in the outcome variable below the threshold")
  }
  if (var_y_r == 0) {
    J_star_r = J_star_r_orig = 1
    print("Warning: not enough variability in the outcome variable above the threshold")
  }
  rscale_l = J_star_l/J_IMSE[1]
  rscale_r = J_star_r/J_IMSE[2]
  bin_x_l = rep(0, length(x_l))
  bin_x_r = rep(0, length(x_r))
  jump_l = range_l/J_star_l
  jump_r = range_r/J_star_r
  if (meth == "es") {
    jumps_l = seq(x_min, c, jump_l)
    jumps_r = seq(c, x_max, jump_r)
  }
  else if (meth == "qs") {
    jumps_l = quantile(x_l, probs = seq(0, 1, 1/J_star_l))
    jumps_r = quantile(x_r, probs = seq(0, 1, 1/J_star_r))
  }
  for (k in 1:(J_star_l - 1)) bin_x_l[x_l >= jumps_l[k] & x_l < 
                                        jumps_l[k + 1]] = -J_star_l + k - 1
  bin_x_l[x_l >= jumps_l[(J_star_l)]] = -1
  for (k in 1:(J_star_r - 1)) bin_x_r[x_r >= jumps_r[k] & x_r < 
                                        jumps_r[k + 1]] = k
  bin_x_r[x_r >= jumps_r[(J_star_r)]] = J_star_r
  rdplot_mean_bin_l = rdplot_mean_x_l = rdplot_mean_y_l = rep(0, 
                                                              J_star_l)
  rdplot_mean_bin_r = rdplot_mean_x_r = rdplot_mean_y_r = rep(0, 
                                                              J_star_r)
  for (k in 1:(J_star_l)) {
    rdplot_mean_bin_l[k] = mean(c(jumps_l[k], jumps_l[k + 
                                                        1]))
    rdplot_mean_x_l[k] = mean(x_l[bin_x_l == -k])
    rdplot_mean_y_l[J_star_l - k + 1] = mean(y_l[bin_x_l == 
                                                   -k])
  }
  for (k in 1:(J_star_r)) {
    rdplot_mean_bin_r[k] = mean(c(jumps_r[k], jumps_r[k + 
                                                        1]))
    rdplot_mean_x_r[k] = mean(x_r[bin_x_r == k])
    rdplot_mean_y_r[k] = mean(y_r[bin_x_r == k])
  }
  rdplot_mean_bin_l[J_star_l] = mean(c(jumps_l[J_star_l], c))
  rdplot_mean_bin_r[J_star_r] = mean(c(jumps_r[J_star_r], x_max))
  bin_x = c(bin_x_l, bin_x_r)
  rdplot_mean_bin = c(rdplot_mean_bin_l, rdplot_mean_bin_r)
  rdplot_mean_x = c(rdplot_mean_x_l, rdplot_mean_x_r)
  rdplot_mean_y = c(rdplot_mean_y_l, rdplot_mean_y_r)
  rdplot_sd_y_l = rdplot_N_l = rdplot_sd_y_r = rdplot_N_r = 0
  for (j in 1:(J_star_l)) {
    rdplot_sd_y_l[j] = sd(y_l[bin_x_l == -j])
    rdplot_N_l[j] = length(y_l[bin_x_l == -j])
  }
  for (j in 1:(J_star_r)) {
    rdplot_sd_y_r[j] = sd(y_r[bin_x_r == j])
    rdplot_N_r[j] = length(y_r[bin_x_r == j])
  }
  rdplot_sd_y_l[is.na(rdplot_sd_y_l)] = 0
  rdplot_sd_y_r[is.na(rdplot_sd_y_r)] = 0
  rdplot_sd_y = c(rev(rdplot_sd_y_l), rdplot_sd_y_r)
  rdplot_N = c(rev(rdplot_N_l), rdplot_N_r)
  quant = -qt((1 - (ci/100))/2, max(rdplot_N - 1, 1))
  rdplot_se_y <- rdplot_sd_y/sqrt(rdplot_N)
  rdplot_cil_bin = rdplot_mean_y - quant * rdplot_se_y
  rdplot_cir_bin = rdplot_mean_y + quant * rdplot_se_y
  if (hide == "FALSE") {
    if (is.null(col.lines)) 
      col.lines = "blue"
    if (is.null(col.dots)) 
      col.dots = 1
    if (is.null(type.dots)) 
      type.dots = 20
    if (is.null(title)) 
      title = "RD Plot"
    if (is.null(x.label)) 
      x.label = "X axis"
    if (is.null(y.label)) 
      y.label = "Y axis"
    par = par
    if (flag_no_ci == TRUE) {
      plot(rdplot_mean_bin, rdplot_mean_y, main = title, 
           xlab = x.label, ylab = y.label, ylim = y.lim, 
           xlim = x.lim, col = col.dots, pch = type.dots, 
           ...)
      lines(x_plot_l, y_hat_l, type = "l", col = col.lines, lwd = line.lwd)
      lines(x_plot_r, y_hat_r, type = "l", col = col.lines, lwd = line.lwd)
      segments(0, 0, 0, 0.5, lty = 3, col = "black", lwd = 1.5)
    }
    else {
      if (shade == TRUE) {
        plot(rdplot_mean_bin, rdplot_mean_y, main = title, 
             xlab = x.label, ylab = y.label, ylim = y.lim, 
             xlim = x.lim, col = col.dots, pch = type.dots, 
             ...)
        polygon(c(rev(rdplot_mean_bin), rdplot_mean_bin), 
                c(rev(rdplot_cil_bin), rdplot_cir_bin), col = "grey75")
        lines(x_plot_l, y_hat_l, type = "l", col = col.lines, lwd = line.lwd)
        lines(x_plot_r, y_hat_r, type = "l", col = col.lines, lwd = line.lwd)
        abline(v = c)
      }
      else {
        plot(rdplot_mean_bin, rdplot_mean_y, main = title, 
             xlab = x.label, ylab = y.label, ylim = y.lim, 
             xlim = x.lim, col = col.dots, pch = type.dots, 
             ...)
        arrows(rdplot_mean_bin, rdplot_cil_bin, rdplot_mean_bin, 
               rdplot_cir_bin, code = 3, length = 0.1, angle = 90, 
               col = "grey")
        lines(x_plot_l, y_hat_l, type = "l", col = col.lines, lwd = line.lwd)
        lines(x_plot_r, y_hat_r, type = "l", col = col.lines, lwd = line.lwd)
        abline(v = c)
      }
    }
  }
  cutoffs = c(jumps_l, jumps_r[2:length(jumps_r)])
  rdplot_min_bin = cutoffs[1:(length(cutoffs) - 1)]
  rdplot_max_bin = cutoffs[2:length(cutoffs)]
  bin_length = rdplot_max_bin - rdplot_min_bin
  bin_avg_l = mean(bin_length[1:J_star_l])
  bin_med_l = median(bin_length[1:J_star_l])
  bin_avg_r = mean(bin_length[(J_star_l + 1):length(bin_length)])
  bin_med_r = median(bin_length[(J_star_l + 1):length(bin_length)])
  vars = data.frame(rdplot_mean_bin = rdplot_mean_bin, rdplot_mean_x = rdplot_mean_x, 
                    rdplot_mean_y = rdplot_mean_y, rdplot_min_bin = rdplot_min_bin, 
                    rdplot_max_bin = rdplot_max_bin, rdplot_se_y = rdplot_se_y, 
                    rdplot_N = rdplot_N, rdplot_ci_l = rdplot_cil_bin, rdplot_ci_r = rdplot_cir_bin)
  coef = cbind(gamma_p1_l, gamma_p1_r)
  colnames(coef) = c("Left", "Right")
  out = list(coef = coef, genvars = vars, J = c(J_star_l, J_star_r), 
             J_IMSE = J_IMSE, J_MV = J_MV, scale = c(scale_l, scale_r), 
             rscale = c(rscale_l, rscale_r), bin_avg = c(bin_avg_l, 
                                                         bin_avg_r), bin_med = c(bin_med_l, bin_med_r), p = p, 
             c = c, h = c(h_l, h_r), N = c(n_l, n_r), Nh = c(n_h_l, 
                                                             n_h_r), binselect = binselect_type, kernel = kernel_type)
  out$call <- match.call()
  class(out) <- "rdplot"
  return(invisible(out))
}

qrXXinv = function(x, ...) {
  #tcrossprod(solve(qr.R(qr(x, tol = 1e-10)), tol = 1e-10))
  #tcrossprod(solve(qr.R(qr(x))))
  chol2inv(chol(crossprod(x)))
}

qrreg = function(x,y,w,s2=0,var.comp=TRUE, ...) {
  M.X = sqrt(w)*x
  X.M.X_inv = qrXXinv(M.X) 
  X.M.Y = crossprod(M.X,sqrt(w)*y)
  beta.hat = X.M.X_inv%*%X.M.Y
  Psi.hat=Sigma.hat=0
  if (var.comp==TRUE) {
    Psi.hat = crossprod((w*s2*w)*x,x)
    Sigma.hat = crossprod(Psi.hat%*%X.M.X_inv,X.M.X_inv)
  }
  output = list(X.M.X_inv=X.M.X_inv, X.M.Y=X.M.Y, beta.hat=beta.hat, Psi.hat=Psi.hat, Sigma.hat=Sigma.hat)
  return(output)
}

rdrobust_kweight = function(X, c,  h,  kernel){
  u = (X-c)/h
  if (kernel=="epanechnikov" | kernel=="epa") {
    w = (0.75*(1-u^2)*(abs(u)<=1))/h
  }
  else if (kernel=="uniform" | kernel=="uni") {
    w = (0.5*(abs(u)<=1))/h
  }
  else {
    w = ((1-abs(u))*(abs(u)<=1))/h
  }
  return(w)	
}

rdrobust_res = function(X, y, T, Z, m, hii, vce, matches, dups, dupsid, d) {
  n = length(y)
  dT=dZ=0
  if (!is.null(T)) dT = 1
  if (!is.null(Z)) dZ = ncol(Z)
  res = matrix(NA,n,1+dT+dZ)  	
  
  if (vce=="nn") {
    for (pos in 1:n) {
      rpos = dups[pos] - dupsid[pos]
      lpos = dupsid[pos] - 1
      while (lpos+rpos < min(c(matches,n-1))) {
        if (pos-lpos-1 <= 0) rpos = rpos + dups[pos+rpos+1]
        else if (pos+rpos+1>n) lpos = lpos + dups[pos-lpos-1]
        else if ((X[pos]-X[pos-lpos-1]) > (X[pos+rpos+1]-X[pos])) rpos = rpos + dups[pos+rpos+1]
        else if ((X[pos]-X[pos-lpos-1]) < (X[pos+rpos+1]-X[pos])) lpos = lpos + dups[pos-lpos-1]
        else {
          rpos = rpos + dups[pos+rpos+1]
          lpos = lpos + dups[pos-lpos-1]
        }
      }
      ind_J = max(c(0,(pos-lpos))):min(c(n,(pos+rpos)))
      y_J   = sum(y[ind_J])-y[pos]
      Ji = length(ind_J)-1
      res[pos,1] = sqrt(Ji/(Ji+1))*(y[pos] - y_J/Ji)
      if (!is.null(T)) {
        T_J = sum(T[ind_J])-T[pos]
        res[pos,2] = sqrt(Ji/(Ji+1))*(T[pos] - T_J/Ji)
      }
      if (!is.null(Z)) {
        for (i in 1:dZ) {
          Z_J = sum(Z[ind_J,i])-Z[pos,i]
          res[pos,1+dT+i] = sqrt(Ji/(Ji+1))*(Z[pos,i] - Z_J/Ji)
        }
      }
    }		
  }
  else {
    if (vce=="hc0") w = 1
    else if (vce=="hc1") w = sqrt(n/(n-d))
    else if (vce=="hc2") w = sqrt(1/(1-hii))
    else                 w =      1/(1-hii)
    res[,1] = w*(y-m[,1])
    if (dT==1) res[,2] = w*(T-m[,2])
    if (dZ>0) {
      for (i in 1:dZ) {
        res[,1+dT+i] = w*(Z[,i]-m[,1+dT+i])
      }
    }
  }
  return(res)
}

rdrobust_bw = function(Y, X, T, Z, C, W, c, o, nu, o_B, h_V, h_B, scale, vce, nnmatch, kernel, dups, dupsid){
  dT = dZ = dC = eC = 0
  w = rdrobust_kweight(X, c, h_V, kernel)
  dW = length(W)
  if (dW>1) {
    w = W*w
  }
  
  ind_V = w> 0; eY = Y[ind_V];eX = X[ind_V];eW = w[ind_V]
  n_V = sum(ind_V)
  D_V = eY
  R_V = matrix(NA,n_V,o+1)
  for (j in 1:(o+1)) R_V[,j] = (eX-c)^(j-1)
  invG_V = qrXXinv(R_V*sqrt(eW))
  e_v = matrix(0,(o+1),1); e_v[nu+1]=1
  s = 1
  eT=eC=eZ=NULL
  if (!is.null(T)) {
    dT = 1
    eT = T[ind_V]
    D_V = cbind(D_V,eT)
  }
  if (!is.null(Z)) {
    dZ = ncol(Z)
    eZ = Z[ind_V,,drop=FALSE]
    D_V = cbind(D_V,eZ)
    U = crossprod(R_V*eW,D_V)
    ZWD  = crossprod(eZ*eW,D_V)
    colsZ = (2+dT):max(c(2+dT+dZ-1,(2+dT)))
    UiGU =  crossprod(matrix(U[,colsZ],nrow=o+1),invG_V%*%U) 
    ZWZ = ZWD[,colsZ] - UiGU[,colsZ] 
    ZWY = ZWD[,1:(1+dT)] - UiGU[,1:(1+dT)] 
    gamma = chol2inv(chol(ZWZ))%*%ZWY
    s = c(1 , -gamma[,1])
  }
  if (!is.null(C)) {
    dC = 1
    eC =  C[ind_V] 
  }
  beta_V = invG_V%*%crossprod(R_V*eW,D_V)	
  if (is.null(Z) & !is.null(T)) {	
    tau_Y = factorial(nu)*beta_V[nu+1,1]
    tau_T = factorial(nu)*beta_V[nu+1,2]
    s = c(1/tau_T , -(tau_Y/tau_T^2))
  }
  if (!is.null(Z) & !is.null(T)) {	
    s_T = c(1 , -gamma[,2])
    tau_Y = factorial(nu)*t(s)%*%  c(beta_V[nu+1,1],beta_V[nu+1,colsZ])
    tau_T = factorial(nu)*t(s_T)%*%c(beta_V[nu+1,2],beta_V[nu+1,colsZ])
    s = c(1/tau_T , -(tau_Y/tau_T^2) , -(1/tau_T)*gamma[,1] + (tau_Y/tau_T^2)*gamma[,2])
  }	
  dups_V=dupsid_V=predicts_V=0
  
  if (vce=="nn") {
    dups_V   = dups[ind_V]
    dupsid_V = dupsid[ind_V]
  }
  
  if (vce=="hc0" | vce=="hc1" | vce=="hc2" | vce=="hc3") {
    predicts_V=R_V%*%beta_V
    if (vce=="hc2" | vce=="hc3") {
      hii=matrix(NA,n_V,1)	
      for (i in 1:n_V) {
        hii[i] = R_V[i,]%*%invG_V%*%(R_V*eW)[i,]
      }
    }
  }	
  res_V = rdrobust_res(eX, eY, eT, eZ, predicts_V, hii, vce, nnmatch, dups_V, dupsid_V, o+1)
  V_V = (invG_V%*%rdrobust_vce(dT+dZ, s, R_V*eW, res_V, eC)%*%invG_V)[nu+1,nu+1]
  v = crossprod(R_V*eW,((eX-c)/h_V)^(o+1))
  Hp = 0
  for (j in 1:(o+1)) Hp[j] = h_V^((j-1))
  BConst = (Hp*(invG_V%*%v))[nu+1]
  
  w = rdrobust_kweight(X, c, h_B, kernel)
  if (dW>1) {
    w = W*w
  }
  ind = w> 0 
  n_B = sum(ind)
  eY = Y[ind];eX = X[ind];eW = w[ind]
  D_B = eY
  R_B = matrix(NA,n_B,o_B+1)
  for (j in 1:(o_B+1)) R_B[,j] = (eX-c)^(j-1)
  invG_B = qrXXinv(R_B*sqrt(eW))
  eT=eC=eZ=NULL
  if (!is.null(T)) {
    eT = T[ind]
    D_B = cbind(D_B,eT)
  }
  if (!is.null(Z)) {
    eZ = Z[ind,,drop=FALSE]
    D_B = cbind(D_B,eZ)
  }
  if (!is.null(C)) {
    eC=C[ind]
  }	
  beta_B = invG_B%*%crossprod(R_B*eW,D_B)	
  BWreg=0
  if (scale>0) {
    e_B = matrix(0,(o_B+1),1); e_B[o+2]=1
    dups_B=dupsid_B=hii=predicts_B=0
    if (vce=="nn") {
      dups_B   = dups[ind]
      dupsid_B = dupsid[ind]
    }
    if (vce=="hc0" | vce=="hc1" | vce=="hc2" | vce=="hc3") {
      predicts_B=R_B%*%beta_B
      if (vce=="hc2" | vce=="hc3") {
        hii=matrix(NA,n_B,1)	
        for (i in 1:n_B) {
          hii[i] = R_B[i,]%*%invG_B%*%(R_B*eW)[i,]
        }
      }
    }	
    res_B = rdrobust_res(eX, eY, eT, eZ, predicts_B, hii, vce, nnmatch, dups_B, dupsid_B,o_B+1)
    V_B = (invG_B%*%rdrobust_vce(dT+dZ, s, R_B*eW, res_B, eC)%*%invG_B)[o+2,o+2]
    BWreg = 3*BConst^2*V_B
  }
  B =  sqrt(2*(o+1-nu))*BConst%*%(t(s)%*%(beta_B[o+2,]))
  V = (2*nu+1)*h_V^(2*nu+1)*V_V
  R = scale*(2*(o+1-nu))*BWreg
  rate = 1/(2*o+3)
  output = list(V=V,B=B,R=R,rate=rate)
  return(output)
}

rdrobust_vce = function(d, s, RX, res, C) {	
  k = ncol(as.matrix(RX))
  M = matrix(0,k,k)
  n  = length(C)
  if (is.null(C)) {
    w = 1
    if (d==0){
      M  = crossprod(c(res)*RX)
    }
    else {
      for (i in 1:(1+d)) {
        SS = res[,i]*res
        for (j in 1:(1+d)) {
          M = M + crossprod(RX*(s[i]*s[j])*SS[,j],RX)
        }
      }
    }
  }
  else {	
    clusters = unique(C)
    g     = length(clusters)
    w=((n-1)/(n-k))*(g/(g-1))
    if (d==0){
      for (i in 1:g) {
        ind=C==clusters[i]
        Xi = RX[ind,,drop=FALSE]
        ri = res[ind,,drop=FALSE]
        M = M + crossprod(t(crossprod(Xi,ri)),t(crossprod(Xi,ri)))
      }
    }
    else {
      for (i in 1:g) {
        ind=C==clusters[i]
        Xi = RX[ind,,drop=FALSE]
        ri = res[ind,,drop=FALSE]
        for (l in 1:(1+d)) {	
          for (j in 1:(1+d)) {
            M = M + crossprod(t(crossprod(Xi,s[l]*ri[,l])),t(crossprod(Xi,s[j]*ri[,j])))
          }	
        }					
      }
    }
  }
  return(w*M)		
}

bwconst = function(p,v,kernel){
  if (kernel=="epanechnikov" | kernel=="epa" | kernel==3) {
    K.fun = function(u) {(0.75*(1-u^2)*(abs(u)<=1))}
  }
  else if (kernel=="uniform" | kernel=="uni" | kernel==2) {
    K.fun = function(u) {(0.5*(abs(u)<=1))}
  }
  else  {
    K.fun = function(u) {((1-abs(u))*(abs(u)<=1))}
  }
  p1 = p+1  
  Gamma_p = Phi_p = matrix(NA,p1,p1)
  Omega_pq = matrix(NA,p1,1)
  for (i in 1:p1) {
    Omega.fun = function(u) {K.fun(u)*(u^(p1))*(u^(i-1))}
    Omega_pq[i] = integrate(Omega.fun,lower=0,upper=1)$value
    for (j in 1:p1) {
      Gamma.fun = function(u) {K.fun(u)*(u^(i-1))*(u^(j-1))}
      Phi.fun   = function(u) {(K.fun(u)^2)*(u^(i-1))*(u^(j-1))}
      Gamma_p[i,j] = integrate(Gamma.fun,lower=0,upper=1)$value
      Phi_p[i,j] = integrate(Phi.fun,lower=0,upper=1)$value
    }
  }
  B_const = solve(Gamma_p)%*%Omega_pq
  V_const = solve(Gamma_p)%*%Phi_p%*%solve(Gamma_p)
  C1 = B_const[v+1,1]
  C2 = V_const[v+1,v+1]
  return(c(C1,C2))
}

rdvce= function(X,y,frd=NULL,p,h,matches,vce,kernel){
  m = matches+1
  n = length(X)
  p1 = p+1
  sigma = matrix(0,n,1)
  if (vce=="resid") {
    for (k in 1:n) {
      cutoff = matrix(X[k],n,1)
      cutoff1 = X[k]
      W = rdrobust_kweight(X,cutoff1,h,"kernel")
      ind=W>0
      if (sum(ind)>5) {
        w.p=W[ind]; X.p=X[ind]; y.p=y[ind]
        XX.p = matrix(c((X.p-cutoff1)^0, poly(X.p-cutoff1,degree=p,raw=T)),length(X.p),p+1)
        mu0_phat_y = qr.coef(qr(XX.p*sqrt(w.p), tol = 1e-10), sqrt(w.p)*y.p)[1]
        if (is.null(frd)) {
          sigma[k] = (y[k] - mu0_phat_y)^2
        }
        else if (!is.null(frd)) {
          z.p=frd[ind]
          out=qrreg(XX.p, z.p, w.p, var.comp=FALSE) 
          mu0_phat_z = out$beta.hat[1]
          sigma[k] = (y[k] - mu0_phat_y)*(frd[k] - mu0_phat_z)
        }
      }
    }
  }
  else  {
    #y_match_avg = z_match_avg = matrix(NA,n,1)
    for (k in 1:n) {
      diffx = abs(X - X[k])
      m.group = sort(unique(diffx))[2:m]
      ind = which(diffx %in% m.group)
      y_match_avg = z_match_avg = mean(y[ind])
      Ji = length(ind)
      if (is.null(frd)) {
        sigma[k] = (Ji/(Ji+1))*(y[k] - y_match_avg)^2
      } 
      else if (!is.null(frd)) {
        z_match_avg = mean(frd[ind])
        sigma[k] = (Ji/(Ji+1))*(y[k] - y_match_avg)*(frd[k] - z_match_avg)
      }
    }
  }
  return(sigma)
}

regconst = function(d,h){
  d2 = 2*d+1
  d1 = d+1
  mu = matrix(0,d2, 1)
  mu[1] = 1
  XX = matrix(0,d1,d1)
  for (j in 2:d2) {
    i = j-1
    if (j%%2==1) {
      mu[j] = (1/(i+1))*(h/2)^i
    }
  }
  for (j in 1:d1) {
    XX[j,] = t(mu[j:(j+d)])
  }
  invXX =solve(XX)
  return(invXX)
}

coarse.val <- function(x, n = 10) {
  y <- cut(x, seq(n/2, max(x, na.rm=TRUE), by = n), lower = TRUE)
  x - n*as.numeric(y)
}

plot_rdd <- function(y = 0.01, dot.col = adjustcolor('dark blue', alpha = 0.2), lwd = 2.5, line.lwd = 3, g = function(x) 1*(x > 0), place.est = 0.55){
  
  p <- function(rd){
    paste("a = ", roundr(rd$Estimate[2], 2), " (", roundr(rd$Estimate[4], 2),")", sep = "")
  }
  
  
  par(mfrow = c(1, 3), mar = c(3.25, 3.25, 1, 0), mgp = c(2, 1/2, 0), oma = c(0, 0, 0, 0))
  Y <- g(econ_data$mention.rts)
  x <- coarse.val(econ_data$rts.value, n = 100)
  rd <- rdrobust(y = Y, x = x)
  my.rdplot(y = Y, x = x, y.lim = c(0,.6), col.dots=dot.col, col.lines="black", xaxt = "n", las = 2, title = "RTS Index", x.lab = "Relative to 100, 200, 300, ...", bty = "n", y.lab = "Coverage intensity", cex.lab = 1.25, lwd = lwd, line.lwd=line.lwd)
  axis(1, at = seq(-50, 50, by = 25))
  arrows(-30, y, -45, y, angle = 10, length=0.1)
  text(-15, y, "Worse", cex = 1.25)
  arrows(30, y, 45, y, angle = 10, length=0.1)
  text(15, y, "Better", cex = 1.25)
  text(0, place.est, p(rd), cex = 1.25)
  
  par(mar = c(3.25, 0, 1, 0), mgp = c(2, 1/2, 0))
  Y <- g(econ_data$mention.exc)
  x <- coarse.val(econ_data$exc.value, n = 5)
  rd <- rdrobust(y = Y, x = x)
  my.rdplot(y = Y, x = x, y.lim = c(0,0.6), col.dots=dot.col, col.lines="black", xaxt = "n", las = 2, title = "RUB/USD", x.lab = "Relative to 5, 10, 15, ...", , yaxt = "n", y.lab = "", bty = "n", cex.lab = 1.25, lwd = lwd, line.lwd=line.lwd)
  axis(1, at = seq(-2, 2, by = 1))
  arrows(-1.2, y, -2, y, angle = 10, length=0.1)
  text(-.6, y, "Better", cex = 1.25)
  arrows(1.2, y, 2, y, angle = 10, length=0.1)
  text(.6, y, "Worse", cex = 1.25)
  text(0, place.est, p(rd), cex = 1.25)
  
  par(mar = c(3.25, 0, 1, 0), mgp = c(2, 1/2, 0))
  Y <- g(econ_data$mention.oil)
  x <- coarse.val(econ_data$oil.value, n = 5)
  rd <- rdrobust(y = Y, x = x)
  my.rdplot(y = Y, x = x, y.lim = c(0,0.6), col.dots=dot.col, col.lines="black", xaxt = "n", las = 2, title = "Oil Price", x.lab = "Relative to 5, 10, 15, ...", y.lab = "", bty = "n", yaxt = "n", cex.lab = 1.2, lwd = lwd, line.lwd=line.lwd)
  axis(1, at = seq(-2, 2, by = 1))
  arrows(-1.2, y, -2, y, angle = 10, length=0.1)
  text(-.6, y, "Worse", cex = 1.25)
  arrows(1.2, y, 2, y, angle = 10, length=0.1)
  text(.6, y, "Better", cex = 1.25)
  text(0, place.est, p(rd), cex = 1.25)
  
}



########################################################################
#---------------------- Functions for Figure 5  -----------------------#
########################################################################

ame <- function(fit){
  dat <<- fit$data
  X <- function(z) model.matrix(fit, data = mutate(dat, neg_outcome = z))
  fit <- update(fit, data = dat)
  b <- heavy::rmnorm(1000, coefficients(fit), multiwayvcov::cluster.vcov(fit, ~ time))
  out <- apply(pnorm(X(0)%*%t(b)) - pnorm(X(1)%*%t(b)), 2, mean)
  c(mean(out), coda::HPDinterval(coda::as.mcmc(out), prob = 0.95))
}


########################################################################
#---------------------- Functions for Figure 6  -----------------------#
########################################################################

grid_arrange_shared_legend <- function(..., ncol = length(list(...)), nrow = 1, position = c("bottom", "right")) {
  # Source: https://github.com/tidyverse/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
  plots <- list(...)
  position <- match.arg(position)
  g <- ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  lwidth <- sum(legend$width)
  gl <- lapply(plots, function(x) x + theme(legend.position="none"))
  gl <- c(gl, ncol = ncol, nrow = nrow)
  
  combined <- switch(position,
                     "bottom" = arrangeGrob(do.call(arrangeGrob, gl),
                                            legend,
                                            ncol = 1,
                                            heights = unit.c(unit(1, "npc") - lheight, lheight)),
                     "right" = arrangeGrob(do.call(arrangeGrob, gl),
                                           legend,
                                           ncol = 2,
                                           widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
  
  grid.newpage()
  grid.draw(combined)
  
  # return gtable invisibly
  invisible(combined)
  
}


pout_het <- function(fit, ...){
  
  p <- function(z){
    x <- seq(min(ymd('1999-01-01')), max(ymd('2016-07-22')), by = "week")
    X <- predict(fit, newdata = data.frame(neg_outcome = z, date = x), type = "lpmatrix")
    b <- heavy::rmnorm(1000, coef(fit), vcov(fit))
    y <- pnorm(X%*%t(b))
    y <- t(apply(y, 1, function(x) c(mean(x), coda::HPDinterval(coda::as.mcmc(x), prob = 0.95))))
    colnames(y) <- c("m", "lci", "uci")
    y <- data.frame(var = as.character(formula(fit)[[2]]), z = 1-z, date = x, y)
    y
  }
  
  do.call("rbind", lapply(c(0, 1), p))
  
}

het_plot <- function(p, gg_title, Putin = F, sanctions = F, protest = F, yax = T){
  
  p <- pout_het(p)
  p$labs[p$z==0] <- "Good news"
  p$labs[p$z==1] <- "Bad news"
  labs <- c("Good news  ", "Bad news  ")
  
  gg <- ggplot(p, aes(x=date, y=m, colour=labs, fill = labs, linetype = labs)) + 
    geom_line(size = 1.15) +
    geom_vline(xintercept = as.numeric(elections[c(1,3,5,7)]), lty = 2, col = "grey40", size = 1.02) +  
    geom_vline(xintercept = as.numeric(elections[c(2,4,6,8)]), lty = 3, col = "grey40", size = 1.02) + 
    
    geom_ribbon(aes(ymin=lci, ymax=uci, alpha = rev(factor(z))), colour=NA) + 
    theme_bw()  + xlab("") + 
    scale_color_manual("legend", values=c("dark blue", "dark red"), labels = labs) + 
    scale_fill_manual("legend", values=c("dark blue", "dark red"), labels = labs) + 
    scale_linetype_manual("legend", values=rev(c("solid", "solid")), labels = labs) + 
    theme(
      axis.title = element_text(size=12,face="bold"),
      axis.title.x = element_text(vjust=-.75, margin=margin(0,0,0,0)),
      axis.text.x = element_text(size=10, face = "bold", colour = "black"),
      axis.text.y = element_text(size=12),
      plot.title = element_text(hjust=0.5, face="bold"),
      legend.position = "top",
      legend.key = element_rect(colour = "white"),
      legend.text = element_text(size = 12),
      legend.title=element_blank(),
      legend.key.height = unit(1, "cm"),
      panel.grid.minor.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(), 
      panel.grid.major.y = element_blank()
    ) + theme(strip.text.x = element_text(size = 12, face = "bold"), strip.background = element_blank()) + 
    ggtitle(gg_title)
  
  
  
  gg <- gg + scale_alpha_manual("legend", values = c("0" = 0.2, "1" = .5), labels = c("Good news  ", "Bad news  ")) + 
    scale_x_date(label = c('2000', '2004', '2008', '2012', '2016'), 
                 breaks = c(ymd('2000-01-01'), ymd('2004-01-01'), ymd('2008-01-01'), ymd('2012-01-01'), ymd('2016-01-01') )) + 
    ylim(c(0, .85))
  
  if (yax) {
    gg <- gg + ylab("Probability of attribution")
  } else {
    gg <- gg + ylab("")
  }
  
  if (Putin) {
    gg <- gg +
      annotate('label', x = ymd('2006-09-01'), y = 0.8, label = 'Putin not running') +
      geom_segment(aes(x=ymd('2006-09-01'), xend=ymd('2008-02-28'), y=0.765, yend=0.70), size = 0.5,
                   arrow = arrow(length = unit(0.2, "cm")), show.legend = F, color='black')  
  } 
  
  if (sanctions) {
    gg <- gg +
      annotate('label', x = ymd('2014-12-22'), y = 0.78, label = 'Sanctions') +
      geom_segment(aes(x=ymd('2014-03-16'), xend=ymd('2014-03-16'), y=0.745, yend=0.5), size = 0.5,
                   arrow = arrow(length = unit(0.2, "cm")), show.legend = F, color='black') +
      geom_segment(aes(x=ymd('2015-07-30'), xend=ymd('2015-07-30'), y=0.745, yend=0.5), size = 0.5,
                   arrow = arrow(length = unit(0.2, "cm")), show.legend = F, color='black')
  }
  
  if (protest) {
    gg <- gg +
      annotate('label', x = ymd('2012-02-28'), y = 0.78, label = 'Protests') +
      geom_segment(aes(x=ymd('2011-12-04'), xend=ymd('2011-12-04'), y=0.745, yend=0.6), size = 0.5,
                   arrow = arrow(length = unit(0.2, "cm")), show.legend = F, color='black') +
      geom_segment(aes(x=ymd('2012-05-06'), xend=ymd('2012-05-06'), y=0.745, yend=0.6), size = 0.5,
                   arrow = arrow(length = unit(0.2, "cm")), show.legend = F, color='black')
  }
  
  gg
  
}



het_plot_show <- function(fit){
  p <- list(
    update(fit, putin_bin ~ .),
    update(fit, rugov_bin ~ .),
    update(fit, forec_bin ~ .),
    update(fit, forgov_bin ~ .)
  )
  
  pu <- het_plot(p[[1]], gg_title = "Vladimir Putin", Putin = T)
  ru <- het_plot(p[[2]], gg_title = "Russian officials")
  fe <- het_plot(p[[3]], gg_title = "Foreign economy", protest = T, yax = F)
  fp <- het_plot(p[[4]], gg_title = "Foreign powers", sanctions = T, yax = F)
  
  grid_arrange_shared_legend(pu, fe, ru, fp, ncol = 2, nrow = 2)
}


het_assoc_plot_show <- function(fit){
  p <- list(
    update(fit, mention_bin_putin ~ .),
    update(fit,  mention_bin_rugov ~ .),
    update(fit,  mention_bin_forec ~ .),
    update(fit, mention_bin_forgov ~ .)
  )
  
  pu <- het_plot(p[[1]], gg_title = "Vladimir Putin", Putin = T)
  ru <- het_plot(p[[2]], gg_title = "Russian officials")
  fe <- het_plot(p[[3]], gg_title = "Foreign economy", protest = T, yax = F)
  fp <- het_plot(p[[4]], gg_title = "Foreign powers", sanctions = T, yax = F)
  
  grid_arrange_shared_legend(pu, fe, ru, fp, ncol = 2, nrow = 2)
}



########################################################################
#---------------------- Functions for Figure 7  -----------------------#
########################################################################

ame7 <- function(fit){
  dat <<- fit$data
  X <- function(z, w) model.matrix(fit, data = mutate(dat, neg_outcome = z, x = w))
  fit <- update(fit, data = dat)
  b <- heavy::rmnorm(10000, coefficients(fit), multiwayvcov::cluster.vcov(fit, ~ year_month))
  g <- function(x){
    out <- apply(pnorm(X(0, x)%*%t(b)) - pnorm(X(1, x)%*%t(b)), 2, mean)
  }
  out <- sapply(0:1, g)
  p <- mean(out[, 1] > out[,2])
  out <- t(apply(out, 2, function(x) c(mean(x), coda::HPDinterval(coda::as.mcmc(x), prob = 0.95))))
  data.frame(outcome = as.character(formula(fit)[[2]]), x = 0:1, out, p = rep(p, 2))
}


approval_out <- function(fit, labs = c("Putin's approval low~~~~~~", "Putin's approval high~~~~~~")){
  
  out <- do.call("rbind", lapply(fit, ame7))
  categories <- c("Vladimir \n Putin", "Russian \n officials", "Foreign \n economy", "Foreign \n governments")
  
  gg <- ggplot(data = out) + geom_pointrange(aes(x = outcome, y = X1, ymin = X2, ymax = X3, colour = factor(x), shape = factor(x)), position = position_dodge(width = .4), size = 1.25) + theme_bw() + ylab("Relative attribution") + xlab("") + scale_colour_manual(labels=labs, values = c("black", "grey50")) + scale_shape_manual(labels=labs, values = c(16:(15+2))) +
    theme(
      axis.title = element_text(size=12,face="bold"),
      axis.title.x = element_text(vjust=-.75, margin=margin(0,0,0,0)),
      axis.text.x = element_text(size=12, face = "bold", colour = "black"),
      axis.text.y = element_text(size=12),
      legend.position = "top",
      legend.key = element_rect(colour = "white"),
      legend.text = element_text(size = 12, hjust = 0),
      legend.title=element_blank(),
      legend.spacing.x = unit(.1,"cm"),
      legend.key.height = unit(1, "cm"),
      panel.grid.major.x = element_blank()
    ) + geom_hline(aes(yintercept=0), linetype = "dotted") + scale_x_discrete(labels = categories)
  
  p <- unique(out$p)
  p[3:4] <- 1-p[3:4]
  gg <- gg + annotate("text", x = c(1:4), y = c(-.4), label = paste("p=", roundr(p, 2), "", sep = ""))
  
  gg
}



###################################################################################
#---------------------- Functions for Appendix B3 Table 2  -----------------------#
###################################################################################

outtex_b3tab2 <- function(fit) stargazer(fit, omit = c("Constant"), digits=2, dep.var.caption = NULL, style = "apsr", dep.var.labels = "", dep.var.labels.include = FALSE,  no.space = FALSE, digits.extra = 0, notes = "{\\footnotesize Significance levels: $^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05}", notes.append = FALSE, keep.stat = c("ll", "n"), title = "Probit regression coefficients with standard errors in parentheses. The independent variable in each specification is the z-score of a given economic indicator.", table.placement="h!", label="tab:report_sentiment", font.size = "small", star.cutoffs = rev(c(0.001, 0.01, 0.05)), column.labels = paste("\\multicolumn{1}{C{2cm}}{", c("RTS index", "Oil price", "RUB/USD exchange", "CPI"), "}"), align = TRUE)




####################################################################################
#---------------------- Functions for Appendix B3 Figure 5  -----------------------#
####################################################################################

plot_distortion <- function(fit) {
  gam.plot <- function(fit, ylab = expression(bold("Predicted probability")), ...){
    plot(fit, xlab = expression(bold("Indicator's positivity (SD's)")), ylab = ylab, shift = coefficients(fit)[1], trans = pnorm, shade = TRUE, shade.col = adjustcolor('dark blue', alpha = 0.2), ylim = c(0, 1), yaxt = "n", col.lab = "black", ...)
    axis(2, seq(0, 1, by = 0.2), las = 2)
    grid(NA, NULL, lty = 1, col = adjustcolor('grey', alpha=0.2))
  }
  
  par(mfrow = c(1, 3), mar = c(3, 3, 2.5, 1/2), mgp = c(1.75, .6, 0))
  gam.plot(fit[[1]], main = "Indicator reported \n in a negative context")
  gam.plot(fit[[2]], main = "Increase in indicator's \n value mentioned", ylab = "")
  gam.plot(fit[[3]], main = "Overall sentiment of \n news is positive", ylab = "")
}






